import sys
import os
import codecs
import xmlrpclib
import urllib
import urllib2
from BeautifulSoup import BeautifulStoneSoup,Tag,CData
import re
import logging
from datetime import datetime
from datetime import timedelta
import time
from optparse import OptionParser
from string import Template
import pickle
import xml
from xml.sax import saxutils

from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app

class MainPage(webapp.RequestHandler):
	def get(self):
		self.response.out.write("""
														<html>
															<head><title>Blogbus to WordPress</title></head>
															<body>
																<h1>Blogbus2WordPress</h1>
																<form action="/" method="post" enctype="multipart/form-data">
																	<div><label>Upload Blogbus export XML</lable></div>
																	<div><input type="file" name="blogbusXml"/></div>
																	<div><input type="submit" value="Convert"></div>
																</form>
																<h2>Instructions</h2>
																<ul>
																	<li>Set Blogbus and WordPress blog to the same time zone.</li>
																	<li>Get the Blogbus backup XML file.</li>
																	<li>Fill the path here and hit "Convert" button, download the XML file for WordPress.</li>
																	<li>Selecting "WordPress" type and import the WordPress XML on WordPress import page, .</li>
																</ul>
																<h2>Code</h2>
																<a href="http://code.google.com/p/blogbus-to-wordpress/">Project hosted on Google Code</a>
																<h2>Author</h2>
																<a href="http://weiwei9.com">Wei Wei</a>
															</body>
														</html>""")

	def post(self):
		blogbusXml = self.request.get('blogbusXml')
		result = convert(blogbusXml)
		self.response.headers['Content-Type'] = 'application/rss+xml'
		self.response.headers['Content-Disposition'] = 'attachment; filename="B2W-' + datetime.now().strftime('%m%d%Y-%H%M') + '.xml"';
		self.response.out.write(result)

application = webapp.WSGIApplication(
																		[('/', MainPage)],
																		debug=True)

def exportHead(dic,tags):
	t = Template(u"""<?xml version="1.0" encoding="UTF-8"?>
							 <!--
							 This is a WordPress eXtended RSS file generated by Live Space Mover as an export of 
							 your blog. It contains information about your blog's posts, comments, and 
							 categories. You may use this file to transfer that content from one site to 
							 another. This file is not intended to serve as a complete backup of your 
							 blog.

							 To import this information into a WordPress blog follow these steps:

							 1.	Log into that blog as an administrator.
							 2.	Go to Manage > Import in the blog's admin.
							 3.	Choose "WordPress" from the list of importers.
							 4.	Upload this file using the form provided on that page.
							 5.	You will first be asked to map the authors in this export file to users 
							 on the blog. For each author, you may choose to map an existing user on 
							 the blog or to create a new user.
							 6.	WordPress will then import each of the posts, comments, and categories 
							 contained in this file onto your blog.
							 -->

							 <!-- generator="Blogbus to WordPress 1.0" created="${nowTime}"-->
							 <rss version="2.0"
							 xmlns:excerpt="http://wordpress.org/export/1.1/excerpt/"
							 xmlns:content="http://purl.org/rss/1.0/modules/content/"
							 xmlns:wfw="http://wellformedweb.org/CommentAPI/"
							 xmlns:dc="http://purl.org/dc/elements/1.1/"
							 xmlns:wp="http://wordpress.org/export/1.0/"
							 >

							 <channel>
							 <title>${blogTitle}</title>
							 <link>${blogURL}</link>
							 <description></description>
							 <pubDate>Tue, 30 Nov 1999 00:00:00 +0000</pubDate>
							 <generator>Blogbus to WordPress 1.0</generator>
							 <language>en</language>
							 <wp:wxr_version>1.1</wp:wxr_version>
							 <wp:base_site_url>http://${blogURL}</wp:base_site_url>
							 <wp:base_blog_url>http://${blogURL}</wp:base_blog_url>
							 """) #need blogTitle, nowTime, blogURL
	# tagT = Template(u'''<wp:tag><wp:tag_name><![CDATA[${tag}]]></wp:tag_name></wp:tag>\n''')
	# tagStr = u''
	# for tag in tags:
		# tagStr+=tagT.substitute(tag=tag)
	dic['blogTitle'] = saxutils.escape(dic['blogTitle'])
	return t.substitute(dic) # + tagStr

def exportEntries(entries):
	ret = ''
	commentT = Template(u"""<wp:comment>
											<wp:comment_id>${commentId}</wp:comment_id>
											<wp:comment_author>${commentAuthor}</wp:comment_author>
											<wp:comment_author_email>${commentEmail}</wp:comment_author_email>
											<wp:comment_author_url>${commentURL}</wp:comment_author_url>
											<wp:comment_author_ip></wp:comment_author_ip>
											<wp:comment_date>${commentDate}</wp:comment_date>
											<wp:comment_date_gmt>0000-00-00 00:00:00</wp:comment_date_gmt>
											<wp:comment_content>${commentContent}</wp:comment_content>
											<wp:comment_approved>1</wp:comment_approved>
											<wp:comment_type></wp:comment_type>
											<wp:comment_parent>0</wp:comment_parent>
											</wp:comment>""") #need commentid, commentauthor, commentemail, commenturl,commentdate,commentcontent
	itemT = Template(u"""<item>
									 <title>${entryTitle}</title>
									 <link>${entryURL}</link>
									 <pubdate>${pubDate}</pubdate>
									 <dc:creator>${entryAuthor}</dc:creator>
									 <guid isPermalink="false">${entryURL}</guid>
									 <description></description>
									 <content:encoded><![CDATA[${entryContent}]]></content:encoded>
									 <excerpt:encoded></excerpt:encoded>
									 <wp:post_id>${entryId}</wp:post_id>
									 <wp:post_date>${postDate}</wp:post_date>
									 <wp:post_date_gmt>0000-00-00 00:00:00</wp:post_date_gmt>
									 <wp:comment_status>open</wp:comment_status>
									 <wp:ping_status>open</wp:ping_status>
									 <wp:post_name>${entryTitle}</wp:post_name>
									 <wp:status>publish</wp:status>
									 <wp:post_parent>0</wp:post_parent>
									 <wp:menu_order>0</wp:menu_order>
									 <wp:post_type>post</wp:post_type>
									 ${categoryPart}
									 ${tagPart}
									 ${comments}
									 </item>
									 """) #need entryTitle, entryURL, entryAuthor, entryContent, entryId, postDate, pubDate
	tagT = Template(u"<category domain=\"post_tag\" nicename=\"${tagNicename}\"><![CDATA[${tag}]]></category>\n")	# need tag
	categoryT = Template(u"<category domain=\"category\" nicename=\"${categoryNicename}\"><![CDATA[${category}]]></category>\n")	# need tag
	commentId = 10000
	entryId = 10000
	for entry in entries:
		commentsStr = u""
		tagStr = u""
		categoryStr = u""
		logging.debug("exporting entry " + entry['title'])
		for tag in entry['tags']:
			tagStr += tagT.substitute(tag = tag, tagNicename = saxutils.escape(tag))

		if entry['category']:
			categoryStr = categoryT.substitute(category = entry['category'], categoryNicename = saxutils.escape(entry['category']))

		for comment in entry['comments']:
			commentsStr+=commentT.substitute(commentId = commentId,
									 commentAuthor = saxutils.escape(comment['author']),
									 commentEmail = saxutils.escape(comment['email']),
									 commentURL = comment['url'],
									 commentDate=comment['date'],
									 commentContent=comment['comment'])
			commentId-=1

		itemStr = itemT.substitute(entryTitle=saxutils.escape(entry['title']),
														 entryURL="http://fake.com/" + str(entryId), entryAuthor='export_user', entryContent=entry['content'],
														 entryId=entryId, postDate=entry['date'], pubDate=entry['date'],
														 tagPart = tagStr,
														 categoryPart = categoryStr,
														 comments=commentsStr)
		entryId-=1
		#logging.debug(itemStr)
		ret += itemStr
	return ret

def exportFoot():
	return """
					</channel>
					</rss>
					"""

def convertEntries(soup, entries, tags):
	"""
	Structure of entry
	entry
	|-date
	|-title
	|-content
	|-tags []
	|-comments
			|-url
			|-email
			|-author
			|-comment
			|-date
	"""
	for log in soup.findAll('log'):
		logging.info("Converting log %s", log.title.string)
		entry = {'tags' : []}
		entries.append(entry)
		entry['date'] = log.logdate.string
		entry['title'] = log.title.string
		entry['content'] = log.content.string or u''
		if log.tags.string :
			entry['tags'] = log.tags.string.split(' ')
		tags.update(entry['tags'])
		entry['category'] = log.sort.string
		entry['comments'] = []
		for comment in log.comments.findAll('comment'):
			c = {'author' : '', 'email' : '', 'comment' : '', 'url' : ''}
			entry['comments'].append(c)
			if comment.email.string:
				c['email'] = comment.email.string
			if comment.nicename.string:
				c['author'] = comment.nicename.string
			if comment.commenttext.string:
				c['comment'] = comment.commenttext.string
			c['date'] = comment.createtime.string
			if comment.homepage.string:
				c['url'] = comment.homepage.string

def convert(blogbusXml):
	entries = []
	tags = set([])
	blogInfoDic = {}
	blogInfoDic['nowTime']=datetime.now().strftime('%Y-%m-%d %H:%M')
	soup = BeautifulStoneSoup(blogbusXml).blogbuscom
	blogInfoDic['blogTitle']= soup.description.blogname.string
	blogInfoDic['blogURL']=soup.description.domainname.string
	logging.debug('Blog Title is %s',blogInfoDic['blogTitle'])
	exportFileName = 'export_'+datetime.now().strftime('%m%d%Y-%H%M')+'.xml'
	convertEntries(soup, entries, tags)
	ret = ''
	ret += exportHead(blogInfoDic, tags)
	logging.debug('Exported header')
	#export entries
	ret += exportEntries(entries)
	#export Foot
	ret += exportFoot()
	logging.debug('Exported footer')
	logging.info("Finished! Congratulations!")
	return ret

def main():
	logging.getLogger().setLevel(logging.DEBUG)
	run_wsgi_app(application)

if __name__ == "__main__":
	main()

